#! /usr/bin/python
#
# Stephen Poletto (spoletto)
# Peter Wilmot (pbwilmot)
# CSCI1580 - Web Search
# Spring 2011 - Brown University
#

import sys

EPSILON = 1.0

# Our trusted file of form docID classID
trainingFile = open('/course/cs158/data/final/training.dat', 'r')
docIDToRealClass = {}
for line in trainingFile:
    line = line.rstrip(' ')
    docID = int(line.split(' ')[0])
    classID = int(line.split(' ')[1])
    docIDToRealClass[docID] = classID
trainingFile.close()


files = []
for i in range(0, 11):
    files.append(open('prediction' + str(i) + '.dat'))

for i in range(0, 11):
    count_wrong = 0
    file = files[i]
    docID = 0
    for line in file:
        line = line.rstrip('\n')
        if line != '':
            docValue = float(line)
            if docValue > EPSILON:
                if docIDToRealClass[docID] != i:
                    # We thought it was in our class but it wasn't
                    count_wrong += 1
            else:
                if docIDToRealClass[docID] == i:
                    # We thought it wasn't in our class but it was.
                    count_wrong += 1
    print str(i) + " got " + count_wrong + " incorrect."
    
files = []
for i in range(0, 11):
    files.append(open('svmtraining' + str(i) + '.dat'))
    
for i in range(0, 11):
    file = files[i]
    line_count = 0
    plus_count = 0
    minus_count = 0
    for line in file:
        line_count += 1
        if line.find('+') > -1:
            plus_count += 1
        if line.find('-') > -1:
            minus_count += 1
    print str(i) + " had " + line_count + " lines " + plus_count + " plusses " + minus_count + " minuesses."
    
    
    